{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ece4cfc7-6d7a-499c-806e-5b42f21d2082",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "765acac1-eb83-4e86-a94e-6200e7851e4c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   ListingId   借款金额  借款期限  借款利率     借款成功日期 初始评级 借款类型 是否首标  年龄 性别  ...   户口认证  \\\n",
      "0     126541  18000    12  18.0   2015/5/4    C   其他    否 NaN  男  ...  未成功认证   \n",
      "1     133291   9453    12  20.0  2015/3/16    D   其他    否 NaN  男  ...   成功认证   \n",
      "2     142421  27000    24  20.0  2016/4/26    E   普通    否 NaN  男  ...  未成功认证   \n",
      "3     149711  25000    12  18.0  2015/3/30    C   其他    否 NaN  男  ...   成功认证   \n",
      "4     152141  20000     6  16.0  2015/1/22    C   电商    否 NaN  男  ...   成功认证   \n",
      "\n",
      "    视频认证   学历认证   征信认证   淘宝认证 历史成功借款次数  历史成功借款金额     总待还本金  历史正常还款期数  历史逾期还款期数  \n",
      "0   成功认证  未成功认证  未成功认证  未成功认证       11     40326   8712.73      57.0      16.0  \n",
      "1  未成功认证  未成功认证  未成功认证  未成功认证        4     14500   7890.64      13.0       1.0  \n",
      "2  未成功认证  未成功认证  未成功认证  未成功认证        5     21894  11726.32      25.0       3.0  \n",
      "3   成功认证  未成功认证  未成功认证  未成功认证        6     36190   9703.41      41.0       1.0  \n",
      "4   成功认证  未成功认证  未成功认证  未成功认证       13     77945      0.00     118.0      14.0  \n",
      "\n",
      "[5 rows x 21 columns]\n"
     ]
    }
   ],
   "source": [
    "# *****************2、数据预处理*****************\n",
    "# 2.1 加载数据\n",
    "df=pd.read_csv(\"train.csv\")\n",
    "\n",
    "df.describe()\n",
    "print(df.head())\n",
    "from matplotlib import pyplot as plt\n",
    "from pylab import mpl\n",
    "mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n",
    "mpl.rcParams[\"axes.unicode_minus\"] = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "22900e15-575e-412c-a416-91318f8306f3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ListingId      0\n",
      "借款金额           0\n",
      "借款期限           0\n",
      "借款利率           0\n",
      "借款成功日期         0\n",
      "初始评级           0\n",
      "借款类型           0\n",
      "是否首标           0\n",
      "年龄           301\n",
      "性别             0\n",
      "手机认证           0\n",
      "户口认证           0\n",
      "视频认证           0\n",
      "学历认证           0\n",
      "征信认证           0\n",
      "淘宝认证           0\n",
      "历史成功借款次数       0\n",
      "历史成功借款金额       0\n",
      "总待还本金          0\n",
      "历史正常还款期数      18\n",
      "历史逾期还款期数      83\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 2.2 空值的处理 检测空值情况 和 处理空值\n",
    "data_na=df.isnull().sum(0)\n",
    "print(data_na)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3eae574e-21c3-4800-8262-9705db358f3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "088d9a0f-ec39-453a-9f8c-922e2a1a3a33",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "22d6bb12-80c3-4ba5-bdeb-c1f83902f53f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# df['借款类型']=df['借款类型'].fillna('其他')\n",
    "# df['是否首标']=df['是否首标'].fillna('否')\n",
    "# df['年龄']=df['年龄'].fillna(df['年龄'].mean())\n",
    "# df['性别']=df['性别'].fillna('男')\n",
    "# df['手机认证']=df['手机认证'].fillna('认证成功')\n",
    "# df['户口认证']=df['户口认证'].fillna('认证成功')\n",
    "# df['视频认证']=df['视频认证'].fillna('认证成功')\n",
    "# df['学历认证']=df['学历认证'].fillna('认证成功')\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9956411a-5f1b-4784-947f-38ae7cbccacd",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = df.dropna(subset=['借款类型'], axis=0)\n",
    "data = data.dropna(subset=['是否首标'], axis=0)\n",
    "data = data.dropna(subset=['年龄'], axis=0)\n",
    "data = data.dropna(subset=['性别'], axis=0)\n",
    "data = data.dropna(subset=['手机认证'], axis=0)\n",
    "data = data.dropna(subset=['户口认证'], axis=0)\n",
    "data = data.dropna(subset=['视频认证'], axis=0)\n",
    "data = data.dropna(subset=['学历认证'], axis=0)\n",
    "data = data.dropna(subset=['征信认证'], axis=0)\n",
    "data = data.dropna(subset=['淘宝认证'], axis=0)\n",
    "data = data.dropna(subset=['学历认证'], axis=0)\n",
    "data = data.dropna(subset=['历史正常还款期数'], axis=0)\n",
    "data = data.dropna(subset=['历史逾期还款期数'], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "43ab522c-5148-4dd7-8430-e3c46c6726f0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ListingId    0\n",
      "借款金额         0\n",
      "借款期限         0\n",
      "借款利率         0\n",
      "借款成功日期       0\n",
      "初始评级         0\n",
      "借款类型         0\n",
      "是否首标         0\n",
      "年龄           0\n",
      "性别           0\n",
      "手机认证         0\n",
      "户口认证         0\n",
      "视频认证         0\n",
      "学历认证         0\n",
      "征信认证         0\n",
      "淘宝认证         0\n",
      "历史成功借款次数     0\n",
      "历史成功借款金额     0\n",
      "总待还本金        0\n",
      "历史正常还款期数     0\n",
      "历史逾期还款期数     0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 2.2 空值的处理 检测空值情况 和 处理空值\n",
    "da_na=data.isnull().sum(0)\n",
    "print(da_na)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "56eb1ff2-3c73-48ff-a6d6-1e65df07c8e0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ListingId</th>\n",
       "      <th>借款金额</th>\n",
       "      <th>借款期限</th>\n",
       "      <th>借款利率</th>\n",
       "      <th>借款成功日期</th>\n",
       "      <th>初始评级</th>\n",
       "      <th>借款类型</th>\n",
       "      <th>是否首标</th>\n",
       "      <th>年龄</th>\n",
       "      <th>性别</th>\n",
       "      <th>...</th>\n",
       "      <th>户口认证</th>\n",
       "      <th>视频认证</th>\n",
       "      <th>学历认证</th>\n",
       "      <th>征信认证</th>\n",
       "      <th>淘宝认证</th>\n",
       "      <th>历史成功借款次数</th>\n",
       "      <th>历史成功借款金额</th>\n",
       "      <th>总待还本金</th>\n",
       "      <th>历史正常还款期数</th>\n",
       "      <th>历史逾期还款期数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ListingId</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.009941</td>\n",
       "      <td>-0.022482</td>\n",
       "      <td>0.126414</td>\n",
       "      <td>0.965850</td>\n",
       "      <td>-0.052551</td>\n",
       "      <td>-0.188825</td>\n",
       "      <td>0.118887</td>\n",
       "      <td>0.013310</td>\n",
       "      <td>-0.138986</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.092384</td>\n",
       "      <td>0.177651</td>\n",
       "      <td>0.282563</td>\n",
       "      <td>0.205959</td>\n",
       "      <td>0.247217</td>\n",
       "      <td>-0.052282</td>\n",
       "      <td>-0.026267</td>\n",
       "      <td>-0.036009</td>\n",
       "      <td>-0.061549</td>\n",
       "      <td>-0.080047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>借款金额</th>\n",
       "      <td>-0.009941</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.011362</td>\n",
       "      <td>-0.333065</td>\n",
       "      <td>-0.010972</td>\n",
       "      <td>-0.269202</td>\n",
       "      <td>0.093424</td>\n",
       "      <td>-0.036839</td>\n",
       "      <td>0.110788</td>\n",
       "      <td>0.010539</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.087175</td>\n",
       "      <td>-0.044062</td>\n",
       "      <td>0.026782</td>\n",
       "      <td>-0.008029</td>\n",
       "      <td>-0.098078</td>\n",
       "      <td>0.013808</td>\n",
       "      <td>0.312869</td>\n",
       "      <td>0.202477</td>\n",
       "      <td>0.017565</td>\n",
       "      <td>0.000925</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>借款期限</th>\n",
       "      <td>-0.022482</td>\n",
       "      <td>0.011362</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.123350</td>\n",
       "      <td>0.014799</td>\n",
       "      <td>-0.107753</td>\n",
       "      <td>0.007985</td>\n",
       "      <td>0.018129</td>\n",
       "      <td>-0.034883</td>\n",
       "      <td>0.018869</td>\n",
       "      <td>...</td>\n",
       "      <td>0.092187</td>\n",
       "      <td>0.067910</td>\n",
       "      <td>-0.040750</td>\n",
       "      <td>-0.067393</td>\n",
       "      <td>-0.006896</td>\n",
       "      <td>-0.005927</td>\n",
       "      <td>-0.052462</td>\n",
       "      <td>-0.004237</td>\n",
       "      <td>0.012162</td>\n",
       "      <td>0.029808</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>借款利率</th>\n",
       "      <td>0.126414</td>\n",
       "      <td>-0.333065</td>\n",
       "      <td>-0.123350</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.090037</td>\n",
       "      <td>0.794554</td>\n",
       "      <td>-0.095491</td>\n",
       "      <td>0.333095</td>\n",
       "      <td>-0.148333</td>\n",
       "      <td>0.035241</td>\n",
       "      <td>...</td>\n",
       "      <td>0.024707</td>\n",
       "      <td>-0.065700</td>\n",
       "      <td>0.126911</td>\n",
       "      <td>0.098251</td>\n",
       "      <td>0.123668</td>\n",
       "      <td>-0.131935</td>\n",
       "      <td>-0.143293</td>\n",
       "      <td>-0.221464</td>\n",
       "      <td>-0.147881</td>\n",
       "      <td>-0.020625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>借款成功日期</th>\n",
       "      <td>0.965850</td>\n",
       "      <td>-0.010972</td>\n",
       "      <td>0.014799</td>\n",
       "      <td>0.090037</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.073839</td>\n",
       "      <td>-0.194619</td>\n",
       "      <td>0.101912</td>\n",
       "      <td>0.018291</td>\n",
       "      <td>-0.152662</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.022511</td>\n",
       "      <td>0.244189</td>\n",
       "      <td>0.227505</td>\n",
       "      <td>0.151380</td>\n",
       "      <td>0.223851</td>\n",
       "      <td>-0.049142</td>\n",
       "      <td>-0.027272</td>\n",
       "      <td>-0.038997</td>\n",
       "      <td>-0.057747</td>\n",
       "      <td>-0.071668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>初始评级</th>\n",
       "      <td>-0.052551</td>\n",
       "      <td>-0.269202</td>\n",
       "      <td>-0.107753</td>\n",
       "      <td>0.794554</td>\n",
       "      <td>-0.073839</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.064861</td>\n",
       "      <td>0.321151</td>\n",
       "      <td>-0.122771</td>\n",
       "      <td>0.093920</td>\n",
       "      <td>...</td>\n",
       "      <td>0.052067</td>\n",
       "      <td>-0.043784</td>\n",
       "      <td>0.073821</td>\n",
       "      <td>0.051100</td>\n",
       "      <td>0.067633</td>\n",
       "      <td>-0.129535</td>\n",
       "      <td>-0.120368</td>\n",
       "      <td>-0.199208</td>\n",
       "      <td>-0.149165</td>\n",
       "      <td>0.010953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>借款类型</th>\n",
       "      <td>-0.188825</td>\n",
       "      <td>0.093424</td>\n",
       "      <td>0.007985</td>\n",
       "      <td>-0.095491</td>\n",
       "      <td>-0.194619</td>\n",
       "      <td>-0.064861</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.067647</td>\n",
       "      <td>0.032219</td>\n",
       "      <td>0.049123</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.093780</td>\n",
       "      <td>-0.115544</td>\n",
       "      <td>0.002210</td>\n",
       "      <td>-0.009253</td>\n",
       "      <td>-0.073750</td>\n",
       "      <td>0.011290</td>\n",
       "      <td>0.078866</td>\n",
       "      <td>0.041561</td>\n",
       "      <td>0.020679</td>\n",
       "      <td>0.055437</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否首标</th>\n",
       "      <td>0.118887</td>\n",
       "      <td>-0.036839</td>\n",
       "      <td>0.018129</td>\n",
       "      <td>0.333095</td>\n",
       "      <td>0.101912</td>\n",
       "      <td>0.321151</td>\n",
       "      <td>-0.067647</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.188509</td>\n",
       "      <td>-0.034481</td>\n",
       "      <td>...</td>\n",
       "      <td>0.063476</td>\n",
       "      <td>0.024444</td>\n",
       "      <td>-0.007965</td>\n",
       "      <td>0.105842</td>\n",
       "      <td>0.073914</td>\n",
       "      <td>-0.215526</td>\n",
       "      <td>-0.108175</td>\n",
       "      <td>-0.333283</td>\n",
       "      <td>-0.224518</td>\n",
       "      <td>-0.233186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>年龄</th>\n",
       "      <td>0.013310</td>\n",
       "      <td>0.110788</td>\n",
       "      <td>-0.034883</td>\n",
       "      <td>-0.148333</td>\n",
       "      <td>0.018291</td>\n",
       "      <td>-0.122771</td>\n",
       "      <td>0.032219</td>\n",
       "      <td>-0.188509</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.042880</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.039071</td>\n",
       "      <td>0.134523</td>\n",
       "      <td>0.077299</td>\n",
       "      <td>-0.009347</td>\n",
       "      <td>-0.027230</td>\n",
       "      <td>0.060550</td>\n",
       "      <td>0.059601</td>\n",
       "      <td>0.114498</td>\n",
       "      <td>0.062823</td>\n",
       "      <td>0.059102</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>性别</th>\n",
       "      <td>-0.138986</td>\n",
       "      <td>0.010539</td>\n",
       "      <td>0.018869</td>\n",
       "      <td>0.035241</td>\n",
       "      <td>-0.152662</td>\n",
       "      <td>0.093920</td>\n",
       "      <td>0.049123</td>\n",
       "      <td>-0.034481</td>\n",
       "      <td>0.042880</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.031160</td>\n",
       "      <td>-0.054970</td>\n",
       "      <td>-0.047446</td>\n",
       "      <td>-0.017230</td>\n",
       "      <td>-0.024719</td>\n",
       "      <td>-0.003299</td>\n",
       "      <td>-0.008456</td>\n",
       "      <td>-0.002132</td>\n",
       "      <td>0.004722</td>\n",
       "      <td>0.002928</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>手机认证</th>\n",
       "      <td>-0.427273</td>\n",
       "      <td>0.021521</td>\n",
       "      <td>-0.078399</td>\n",
       "      <td>0.090241</td>\n",
       "      <td>-0.447305</td>\n",
       "      <td>0.161425</td>\n",
       "      <td>-0.161838</td>\n",
       "      <td>0.099096</td>\n",
       "      <td>-0.068294</td>\n",
       "      <td>-0.020789</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.190757</td>\n",
       "      <td>-0.216192</td>\n",
       "      <td>-0.124946</td>\n",
       "      <td>-0.014176</td>\n",
       "      <td>-0.079669</td>\n",
       "      <td>-0.035606</td>\n",
       "      <td>-0.018048</td>\n",
       "      <td>-0.062779</td>\n",
       "      <td>-0.034303</td>\n",
       "      <td>-0.011205</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>户口认证</th>\n",
       "      <td>-0.092384</td>\n",
       "      <td>-0.087175</td>\n",
       "      <td>0.092187</td>\n",
       "      <td>0.024707</td>\n",
       "      <td>-0.022511</td>\n",
       "      <td>0.052067</td>\n",
       "      <td>-0.093780</td>\n",
       "      <td>0.063476</td>\n",
       "      <td>-0.039071</td>\n",
       "      <td>-0.031160</td>\n",
       "      <td>...</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.091715</td>\n",
       "      <td>-0.108811</td>\n",
       "      <td>-0.049911</td>\n",
       "      <td>-0.048426</td>\n",
       "      <td>-0.063138</td>\n",
       "      <td>-0.068141</td>\n",
       "      <td>-0.132971</td>\n",
       "      <td>-0.078149</td>\n",
       "      <td>-0.104624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>视频认证</th>\n",
       "      <td>0.177651</td>\n",
       "      <td>-0.044062</td>\n",
       "      <td>0.067910</td>\n",
       "      <td>-0.065700</td>\n",
       "      <td>0.244189</td>\n",
       "      <td>-0.043784</td>\n",
       "      <td>-0.115544</td>\n",
       "      <td>0.024444</td>\n",
       "      <td>0.134523</td>\n",
       "      <td>-0.054970</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.091715</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.224103</td>\n",
       "      <td>-0.084989</td>\n",
       "      <td>-0.092784</td>\n",
       "      <td>-0.040825</td>\n",
       "      <td>-0.046872</td>\n",
       "      <td>-0.034651</td>\n",
       "      <td>-0.050307</td>\n",
       "      <td>-0.042390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>学历认证</th>\n",
       "      <td>0.282563</td>\n",
       "      <td>0.026782</td>\n",
       "      <td>-0.040750</td>\n",
       "      <td>0.126911</td>\n",
       "      <td>0.227505</td>\n",
       "      <td>0.073821</td>\n",
       "      <td>0.002210</td>\n",
       "      <td>-0.007965</td>\n",
       "      <td>0.077299</td>\n",
       "      <td>-0.047446</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.108811</td>\n",
       "      <td>-0.224103</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.133158</td>\n",
       "      <td>-0.146963</td>\n",
       "      <td>-0.019072</td>\n",
       "      <td>-0.001079</td>\n",
       "      <td>-0.001931</td>\n",
       "      <td>-0.018059</td>\n",
       "      <td>-0.004171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>征信认证</th>\n",
       "      <td>0.205959</td>\n",
       "      <td>-0.008029</td>\n",
       "      <td>-0.067393</td>\n",
       "      <td>0.098251</td>\n",
       "      <td>0.151380</td>\n",
       "      <td>0.051100</td>\n",
       "      <td>-0.009253</td>\n",
       "      <td>0.105842</td>\n",
       "      <td>-0.009347</td>\n",
       "      <td>-0.017230</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.049911</td>\n",
       "      <td>-0.084989</td>\n",
       "      <td>-0.133158</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.073063</td>\n",
       "      <td>-0.030228</td>\n",
       "      <td>-0.000730</td>\n",
       "      <td>-0.021751</td>\n",
       "      <td>-0.035376</td>\n",
       "      <td>-0.034087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>淘宝认证</th>\n",
       "      <td>0.247217</td>\n",
       "      <td>-0.098078</td>\n",
       "      <td>-0.006896</td>\n",
       "      <td>0.123668</td>\n",
       "      <td>0.223851</td>\n",
       "      <td>0.067633</td>\n",
       "      <td>-0.073750</td>\n",
       "      <td>0.073914</td>\n",
       "      <td>-0.027230</td>\n",
       "      <td>-0.024719</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.048426</td>\n",
       "      <td>-0.092784</td>\n",
       "      <td>-0.146963</td>\n",
       "      <td>-0.073063</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.020321</td>\n",
       "      <td>-0.073208</td>\n",
       "      <td>-0.042504</td>\n",
       "      <td>-0.021862</td>\n",
       "      <td>-0.022558</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>历史成功借款次数</th>\n",
       "      <td>-0.052282</td>\n",
       "      <td>0.013808</td>\n",
       "      <td>-0.005927</td>\n",
       "      <td>-0.131935</td>\n",
       "      <td>-0.049142</td>\n",
       "      <td>-0.129535</td>\n",
       "      <td>0.011290</td>\n",
       "      <td>-0.215526</td>\n",
       "      <td>0.060550</td>\n",
       "      <td>-0.003299</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.063138</td>\n",
       "      <td>-0.040825</td>\n",
       "      <td>-0.019072</td>\n",
       "      <td>-0.030228</td>\n",
       "      <td>-0.020321</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.813881</td>\n",
       "      <td>0.176106</td>\n",
       "      <td>0.981497</td>\n",
       "      <td>0.178501</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>历史成功借款金额</th>\n",
       "      <td>-0.026267</td>\n",
       "      <td>0.312869</td>\n",
       "      <td>-0.052462</td>\n",
       "      <td>-0.143293</td>\n",
       "      <td>-0.027272</td>\n",
       "      <td>-0.120368</td>\n",
       "      <td>0.078866</td>\n",
       "      <td>-0.108175</td>\n",
       "      <td>0.059601</td>\n",
       "      <td>-0.008456</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.068141</td>\n",
       "      <td>-0.046872</td>\n",
       "      <td>-0.001079</td>\n",
       "      <td>-0.000730</td>\n",
       "      <td>-0.073208</td>\n",
       "      <td>0.813881</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.326537</td>\n",
       "      <td>0.798266</td>\n",
       "      <td>0.104329</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>总待还本金</th>\n",
       "      <td>-0.036009</td>\n",
       "      <td>0.202477</td>\n",
       "      <td>-0.004237</td>\n",
       "      <td>-0.221464</td>\n",
       "      <td>-0.038997</td>\n",
       "      <td>-0.199208</td>\n",
       "      <td>0.041561</td>\n",
       "      <td>-0.333283</td>\n",
       "      <td>0.114498</td>\n",
       "      <td>-0.002132</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.132971</td>\n",
       "      <td>-0.034651</td>\n",
       "      <td>-0.001931</td>\n",
       "      <td>-0.021751</td>\n",
       "      <td>-0.042504</td>\n",
       "      <td>0.176106</td>\n",
       "      <td>0.326537</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.199944</td>\n",
       "      <td>0.092197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>历史正常还款期数</th>\n",
       "      <td>-0.061549</td>\n",
       "      <td>0.017565</td>\n",
       "      <td>0.012162</td>\n",
       "      <td>-0.147881</td>\n",
       "      <td>-0.057747</td>\n",
       "      <td>-0.149165</td>\n",
       "      <td>0.020679</td>\n",
       "      <td>-0.224518</td>\n",
       "      <td>0.062823</td>\n",
       "      <td>0.004722</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.078149</td>\n",
       "      <td>-0.050307</td>\n",
       "      <td>-0.018059</td>\n",
       "      <td>-0.035376</td>\n",
       "      <td>-0.021862</td>\n",
       "      <td>0.981497</td>\n",
       "      <td>0.798266</td>\n",
       "      <td>0.199944</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.185501</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>历史逾期还款期数</th>\n",
       "      <td>-0.080047</td>\n",
       "      <td>0.000925</td>\n",
       "      <td>0.029808</td>\n",
       "      <td>-0.020625</td>\n",
       "      <td>-0.071668</td>\n",
       "      <td>0.010953</td>\n",
       "      <td>0.055437</td>\n",
       "      <td>-0.233186</td>\n",
       "      <td>0.059102</td>\n",
       "      <td>0.002928</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.104624</td>\n",
       "      <td>-0.042390</td>\n",
       "      <td>-0.004171</td>\n",
       "      <td>-0.034087</td>\n",
       "      <td>-0.022558</td>\n",
       "      <td>0.178501</td>\n",
       "      <td>0.104329</td>\n",
       "      <td>0.092197</td>\n",
       "      <td>0.185501</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>21 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           ListingId      借款金额      借款期限      借款利率    借款成功日期      初始评级  \\\n",
       "ListingId   1.000000 -0.009941 -0.022482  0.126414  0.965850 -0.052551   \n",
       "借款金额       -0.009941  1.000000  0.011362 -0.333065 -0.010972 -0.269202   \n",
       "借款期限       -0.022482  0.011362  1.000000 -0.123350  0.014799 -0.107753   \n",
       "借款利率        0.126414 -0.333065 -0.123350  1.000000  0.090037  0.794554   \n",
       "借款成功日期      0.965850 -0.010972  0.014799  0.090037  1.000000 -0.073839   \n",
       "初始评级       -0.052551 -0.269202 -0.107753  0.794554 -0.073839  1.000000   \n",
       "借款类型       -0.188825  0.093424  0.007985 -0.095491 -0.194619 -0.064861   \n",
       "是否首标        0.118887 -0.036839  0.018129  0.333095  0.101912  0.321151   \n",
       "年龄          0.013310  0.110788 -0.034883 -0.148333  0.018291 -0.122771   \n",
       "性别         -0.138986  0.010539  0.018869  0.035241 -0.152662  0.093920   \n",
       "手机认证       -0.427273  0.021521 -0.078399  0.090241 -0.447305  0.161425   \n",
       "户口认证       -0.092384 -0.087175  0.092187  0.024707 -0.022511  0.052067   \n",
       "视频认证        0.177651 -0.044062  0.067910 -0.065700  0.244189 -0.043784   \n",
       "学历认证        0.282563  0.026782 -0.040750  0.126911  0.227505  0.073821   \n",
       "征信认证        0.205959 -0.008029 -0.067393  0.098251  0.151380  0.051100   \n",
       "淘宝认证        0.247217 -0.098078 -0.006896  0.123668  0.223851  0.067633   \n",
       "历史成功借款次数   -0.052282  0.013808 -0.005927 -0.131935 -0.049142 -0.129535   \n",
       "历史成功借款金额   -0.026267  0.312869 -0.052462 -0.143293 -0.027272 -0.120368   \n",
       "总待还本金      -0.036009  0.202477 -0.004237 -0.221464 -0.038997 -0.199208   \n",
       "历史正常还款期数   -0.061549  0.017565  0.012162 -0.147881 -0.057747 -0.149165   \n",
       "历史逾期还款期数   -0.080047  0.000925  0.029808 -0.020625 -0.071668  0.010953   \n",
       "\n",
       "               借款类型      是否首标        年龄        性别  ...      户口认证      视频认证  \\\n",
       "ListingId -0.188825  0.118887  0.013310 -0.138986  ... -0.092384  0.177651   \n",
       "借款金额       0.093424 -0.036839  0.110788  0.010539  ... -0.087175 -0.044062   \n",
       "借款期限       0.007985  0.018129 -0.034883  0.018869  ...  0.092187  0.067910   \n",
       "借款利率      -0.095491  0.333095 -0.148333  0.035241  ...  0.024707 -0.065700   \n",
       "借款成功日期    -0.194619  0.101912  0.018291 -0.152662  ... -0.022511  0.244189   \n",
       "初始评级      -0.064861  0.321151 -0.122771  0.093920  ...  0.052067 -0.043784   \n",
       "借款类型       1.000000 -0.067647  0.032219  0.049123  ... -0.093780 -0.115544   \n",
       "是否首标      -0.067647  1.000000 -0.188509 -0.034481  ...  0.063476  0.024444   \n",
       "年龄         0.032219 -0.188509  1.000000  0.042880  ... -0.039071  0.134523   \n",
       "性别         0.049123 -0.034481  0.042880  1.000000  ... -0.031160 -0.054970   \n",
       "手机认证      -0.161838  0.099096 -0.068294 -0.020789  ... -0.190757 -0.216192   \n",
       "户口认证      -0.093780  0.063476 -0.039071 -0.031160  ...  1.000000 -0.091715   \n",
       "视频认证      -0.115544  0.024444  0.134523 -0.054970  ... -0.091715  1.000000   \n",
       "学历认证       0.002210 -0.007965  0.077299 -0.047446  ... -0.108811 -0.224103   \n",
       "征信认证      -0.009253  0.105842 -0.009347 -0.017230  ... -0.049911 -0.084989   \n",
       "淘宝认证      -0.073750  0.073914 -0.027230 -0.024719  ... -0.048426 -0.092784   \n",
       "历史成功借款次数   0.011290 -0.215526  0.060550 -0.003299  ... -0.063138 -0.040825   \n",
       "历史成功借款金额   0.078866 -0.108175  0.059601 -0.008456  ... -0.068141 -0.046872   \n",
       "总待还本金      0.041561 -0.333283  0.114498 -0.002132  ... -0.132971 -0.034651   \n",
       "历史正常还款期数   0.020679 -0.224518  0.062823  0.004722  ... -0.078149 -0.050307   \n",
       "历史逾期还款期数   0.055437 -0.233186  0.059102  0.002928  ... -0.104624 -0.042390   \n",
       "\n",
       "               学历认证      征信认证      淘宝认证  历史成功借款次数  历史成功借款金额     总待还本金  \\\n",
       "ListingId  0.282563  0.205959  0.247217 -0.052282 -0.026267 -0.036009   \n",
       "借款金额       0.026782 -0.008029 -0.098078  0.013808  0.312869  0.202477   \n",
       "借款期限      -0.040750 -0.067393 -0.006896 -0.005927 -0.052462 -0.004237   \n",
       "借款利率       0.126911  0.098251  0.123668 -0.131935 -0.143293 -0.221464   \n",
       "借款成功日期     0.227505  0.151380  0.223851 -0.049142 -0.027272 -0.038997   \n",
       "初始评级       0.073821  0.051100  0.067633 -0.129535 -0.120368 -0.199208   \n",
       "借款类型       0.002210 -0.009253 -0.073750  0.011290  0.078866  0.041561   \n",
       "是否首标      -0.007965  0.105842  0.073914 -0.215526 -0.108175 -0.333283   \n",
       "年龄         0.077299 -0.009347 -0.027230  0.060550  0.059601  0.114498   \n",
       "性别        -0.047446 -0.017230 -0.024719 -0.003299 -0.008456 -0.002132   \n",
       "手机认证      -0.124946 -0.014176 -0.079669 -0.035606 -0.018048 -0.062779   \n",
       "户口认证      -0.108811 -0.049911 -0.048426 -0.063138 -0.068141 -0.132971   \n",
       "视频认证      -0.224103 -0.084989 -0.092784 -0.040825 -0.046872 -0.034651   \n",
       "学历认证       1.000000 -0.133158 -0.146963 -0.019072 -0.001079 -0.001931   \n",
       "征信认证      -0.133158  1.000000 -0.073063 -0.030228 -0.000730 -0.021751   \n",
       "淘宝认证      -0.146963 -0.073063  1.000000 -0.020321 -0.073208 -0.042504   \n",
       "历史成功借款次数  -0.019072 -0.030228 -0.020321  1.000000  0.813881  0.176106   \n",
       "历史成功借款金额  -0.001079 -0.000730 -0.073208  0.813881  1.000000  0.326537   \n",
       "总待还本金     -0.001931 -0.021751 -0.042504  0.176106  0.326537  1.000000   \n",
       "历史正常还款期数  -0.018059 -0.035376 -0.021862  0.981497  0.798266  0.199944   \n",
       "历史逾期还款期数  -0.004171 -0.034087 -0.022558  0.178501  0.104329  0.092197   \n",
       "\n",
       "           历史正常还款期数  历史逾期还款期数  \n",
       "ListingId -0.061549 -0.080047  \n",
       "借款金额       0.017565  0.000925  \n",
       "借款期限       0.012162  0.029808  \n",
       "借款利率      -0.147881 -0.020625  \n",
       "借款成功日期    -0.057747 -0.071668  \n",
       "初始评级      -0.149165  0.010953  \n",
       "借款类型       0.020679  0.055437  \n",
       "是否首标      -0.224518 -0.233186  \n",
       "年龄         0.062823  0.059102  \n",
       "性别         0.004722  0.002928  \n",
       "手机认证      -0.034303 -0.011205  \n",
       "户口认证      -0.078149 -0.104624  \n",
       "视频认证      -0.050307 -0.042390  \n",
       "学历认证      -0.018059 -0.004171  \n",
       "征信认证      -0.035376 -0.034087  \n",
       "淘宝认证      -0.021862 -0.022558  \n",
       "历史成功借款次数   0.981497  0.178501  \n",
       "历史成功借款金额   0.798266  0.104329  \n",
       "总待还本金      0.199944  0.092197  \n",
       "历史正常还款期数   1.000000  0.185501  \n",
       "历史逾期还款期数   0.185501  1.000000  \n",
       "\n",
       "[21 rows x 21 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nu=data.corr()\n",
    "nu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "ceee1b14-5854-46a0-b69b-6dee2f99769c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ListingId   -0.052551\n",
       "借款金额        -0.269202\n",
       "借款期限        -0.107753\n",
       "借款利率         0.794554\n",
       "借款成功日期      -0.073839\n",
       "初始评级         1.000000\n",
       "借款类型        -0.064861\n",
       "是否首标         0.321151\n",
       "年龄          -0.122771\n",
       "性别           0.093920\n",
       "手机认证         0.161425\n",
       "户口认证         0.052067\n",
       "视频认证        -0.043784\n",
       "学历认证         0.073821\n",
       "征信认证         0.051100\n",
       "淘宝认证         0.067633\n",
       "历史成功借款次数    -0.129535\n",
       "历史成功借款金额    -0.120368\n",
       "总待还本金       -0.199208\n",
       "历史正常还款期数    -0.149165\n",
       "历史逾期还款期数     0.010953\n",
       "Name: 初始评级, dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nu['初始评级']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "279237f3-6c86-4184-becd-6ca5b6197d11",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>借款金额</th>\n",
       "      <th>借款期限</th>\n",
       "      <th>借款利率</th>\n",
       "      <th>是否首标</th>\n",
       "      <th>年龄</th>\n",
       "      <th>性别</th>\n",
       "      <th>手机认证</th>\n",
       "      <th>历史成功借款次数</th>\n",
       "      <th>历史成功借款金额</th>\n",
       "      <th>总待还本金</th>\n",
       "      <th>历史正常还款期数</th>\n",
       "      <th>历史逾期还款期数</th>\n",
       "      <th>初始评级</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18000</td>\n",
       "      <td>12</td>\n",
       "      <td>18.0</td>\n",
       "      <td>否</td>\n",
       "      <td>NaN</td>\n",
       "      <td>男</td>\n",
       "      <td>成功认证</td>\n",
       "      <td>11</td>\n",
       "      <td>40326</td>\n",
       "      <td>8712.73</td>\n",
       "      <td>57.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9453</td>\n",
       "      <td>12</td>\n",
       "      <td>20.0</td>\n",
       "      <td>否</td>\n",
       "      <td>NaN</td>\n",
       "      <td>男</td>\n",
       "      <td>未成功认证</td>\n",
       "      <td>4</td>\n",
       "      <td>14500</td>\n",
       "      <td>7890.64</td>\n",
       "      <td>13.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>27000</td>\n",
       "      <td>24</td>\n",
       "      <td>20.0</td>\n",
       "      <td>否</td>\n",
       "      <td>NaN</td>\n",
       "      <td>男</td>\n",
       "      <td>成功认证</td>\n",
       "      <td>5</td>\n",
       "      <td>21894</td>\n",
       "      <td>11726.32</td>\n",
       "      <td>25.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>25000</td>\n",
       "      <td>12</td>\n",
       "      <td>18.0</td>\n",
       "      <td>否</td>\n",
       "      <td>NaN</td>\n",
       "      <td>男</td>\n",
       "      <td>成功认证</td>\n",
       "      <td>6</td>\n",
       "      <td>36190</td>\n",
       "      <td>9703.41</td>\n",
       "      <td>41.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20000</td>\n",
       "      <td>6</td>\n",
       "      <td>16.0</td>\n",
       "      <td>否</td>\n",
       "      <td>NaN</td>\n",
       "      <td>男</td>\n",
       "      <td>成功认证</td>\n",
       "      <td>13</td>\n",
       "      <td>77945</td>\n",
       "      <td>0.00</td>\n",
       "      <td>118.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    借款金额  借款期限  借款利率 是否首标  年龄 性别   手机认证  历史成功借款次数  历史成功借款金额     总待还本金  \\\n",
       "0  18000    12  18.0    否 NaN  男   成功认证        11     40326   8712.73   \n",
       "1   9453    12  20.0    否 NaN  男  未成功认证         4     14500   7890.64   \n",
       "2  27000    24  20.0    否 NaN  男   成功认证         5     21894  11726.32   \n",
       "3  25000    12  18.0    否 NaN  男   成功认证         6     36190   9703.41   \n",
       "4  20000     6  16.0    否 NaN  男   成功认证        13     77945      0.00   \n",
       "\n",
       "   历史正常还款期数  历史逾期还款期数 初始评级  \n",
       "0      57.0      16.0    C  \n",
       "1      13.0       1.0    D  \n",
       "2      25.0       3.0    E  \n",
       "3      41.0       1.0    C  \n",
       "4     118.0      14.0    C  "
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pre=df.loc[:,['借款金额','借款期限','借款利率','是否首标','年龄','性别','手机认证','历史成功借款次数','历史成功借款金额','总待还本金','历史正常还款期数','历史逾期还款期数','初始评级']]\n",
    "data_pre.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "f4c2983d-df05-4d69-9599-86ffd8a7caa4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "借款金额        0\n",
      "借款期限        0\n",
      "借款利率        0\n",
      "是否首标        0\n",
      "年龄          0\n",
      "性别          0\n",
      "手机认证        0\n",
      "历史成功借款次数    0\n",
      "历史成功借款金额    0\n",
      "总待还本金       0\n",
      "历史正常还款期数    0\n",
      "历史逾期还款期数    0\n",
      "初始评级        0\n",
      "dtype: int64\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 9999 entries, 0 to 9998\n",
      "Data columns (total 13 columns):\n",
      " #   Column    Non-Null Count  Dtype  \n",
      "---  ------    --------------  -----  \n",
      " 0   借款金额      9999 non-null   int64  \n",
      " 1   借款期限      9999 non-null   int64  \n",
      " 2   借款利率      9999 non-null   float64\n",
      " 3   是否首标      9999 non-null   int32  \n",
      " 4   年龄        9999 non-null   float64\n",
      " 5   性别        9999 non-null   int32  \n",
      " 6   手机认证      9999 non-null   int32  \n",
      " 7   历史成功借款次数  9999 non-null   int64  \n",
      " 8   历史成功借款金额  9999 non-null   int64  \n",
      " 9   总待还本金     9999 non-null   float64\n",
      " 10  历史正常还款期数  9999 non-null   float64\n",
      " 11  历史逾期还款期数  9999 non-null   float64\n",
      " 12  初始评级      9999 non-null   int32  \n",
      "dtypes: float64(5), int32(4), int64(4)\n",
      "memory usage: 859.4 KB\n"
     ]
    }
   ],
   "source": [
    "print(data_pre.isnull().sum())\n",
    "data_pre.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "e7d057d9-1137-42a6-9420-80e4779c7c9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_pre['是否首标']=data_pre['是否首标'].fillna('否')\n",
    "data_pre['性别']=data_pre['性别'].fillna('女')\n",
    "data_pre['手机认证']=data_pre['手机认证'].fillna('未成功认证')\n",
    "data_pre['年龄']=data_pre['年龄'].fillna(data_pre['年龄'].mean())\n",
    "data_pre['历史正常还款期数']=data_pre['历史正常还款期数'].fillna(data_pre['历史正常还款期数'].mean())\n",
    "data_pre['历史逾期还款期数']=data_pre['历史逾期还款期数'].fillna(data_pre['历史逾期还款期数'].mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "f97d9e19-c518-42f0-86c8-350feba22b0a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>借款金额</th>\n",
       "      <th>借款期限</th>\n",
       "      <th>借款利率</th>\n",
       "      <th>是否首标</th>\n",
       "      <th>年龄</th>\n",
       "      <th>性别</th>\n",
       "      <th>手机认证</th>\n",
       "      <th>历史成功借款次数</th>\n",
       "      <th>历史成功借款金额</th>\n",
       "      <th>总待还本金</th>\n",
       "      <th>历史正常还款期数</th>\n",
       "      <th>历史逾期还款期数</th>\n",
       "      <th>初始评级</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.035807</td>\n",
       "      <td>0.478261</td>\n",
       "      <td>0.657143</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.303872</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.016949</td>\n",
       "      <td>0.005445</td>\n",
       "      <td>0.050548</td>\n",
       "      <td>0.022736</td>\n",
       "      <td>0.592593</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.018710</td>\n",
       "      <td>0.478261</td>\n",
       "      <td>0.771429</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.303872</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.006163</td>\n",
       "      <td>0.001958</td>\n",
       "      <td>0.045779</td>\n",
       "      <td>0.005185</td>\n",
       "      <td>0.037037</td>\n",
       "      <td>0.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.053811</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.771429</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.303872</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.007704</td>\n",
       "      <td>0.002956</td>\n",
       "      <td>0.068032</td>\n",
       "      <td>0.009972</td>\n",
       "      <td>0.111111</td>\n",
       "      <td>0.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.049810</td>\n",
       "      <td>0.478261</td>\n",
       "      <td>0.657143</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.303872</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.009245</td>\n",
       "      <td>0.004887</td>\n",
       "      <td>0.056296</td>\n",
       "      <td>0.016354</td>\n",
       "      <td>0.037037</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.039808</td>\n",
       "      <td>0.217391</td>\n",
       "      <td>0.542857</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.303872</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.020031</td>\n",
       "      <td>0.010525</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.047068</td>\n",
       "      <td>0.518519</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       借款金额      借款期限      借款利率  是否首标        年龄   性别  手机认证  历史成功借款次数  \\\n",
       "0  0.035807  0.478261  0.657143   0.0  0.303872  1.0   0.0  0.016949   \n",
       "1  0.018710  0.478261  0.771429   0.0  0.303872  1.0   1.0  0.006163   \n",
       "2  0.053811  1.000000  0.771429   0.0  0.303872  1.0   0.0  0.007704   \n",
       "3  0.049810  0.478261  0.657143   0.0  0.303872  1.0   0.0  0.009245   \n",
       "4  0.039808  0.217391  0.542857   0.0  0.303872  1.0   0.0  0.020031   \n",
       "\n",
       "   历史成功借款金额     总待还本金  历史正常还款期数  历史逾期还款期数  初始评级  \n",
       "0  0.005445  0.050548  0.022736  0.592593   0.4  \n",
       "1  0.001958  0.045779  0.005185  0.037037   0.6  \n",
       "2  0.002956  0.068032  0.009972  0.111111   0.8  \n",
       "3  0.004887  0.056296  0.016354  0.037037   0.4  \n",
       "4  0.010525  0.000000  0.047068  0.518519   0.4  "
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.4 数值映射 字符串转化为数值\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "le=LabelEncoder()\n",
    "for column in data_pre.columns:\n",
    "    if(data_pre[column]).dtype==type(object):\n",
    "        data_pre[column]=le.fit_transform(data_pre[column])\n",
    "data_pre.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "15be33dc-0aed-4e07-a30d-0c69ec4d9d69",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       借款金额      借款期限      借款利率  是否首标        年龄   性别  手机认证  历史成功借款次数  \\\n",
      "0  0.035807  0.478261  0.657143   0.0  0.303872  1.0   0.0  0.016949   \n",
      "1  0.018710  0.478261  0.771429   0.0  0.303872  1.0   1.0  0.006163   \n",
      "2  0.053811  1.000000  0.771429   0.0  0.303872  1.0   0.0  0.007704   \n",
      "3  0.049810  0.478261  0.657143   0.0  0.303872  1.0   0.0  0.009245   \n",
      "4  0.039808  0.217391  0.542857   0.0  0.303872  1.0   0.0  0.020031   \n",
      "\n",
      "   历史成功借款金额     总待还本金  历史正常还款期数  历史逾期还款期数  初始评级  \n",
      "0  0.005445  0.050548  0.022736  0.592593   0.4  \n",
      "1  0.001958  0.045779  0.005185  0.037037   0.6  \n",
      "2  0.002956  0.068032  0.009972  0.111111   0.8  \n",
      "3  0.004887  0.056296  0.016354  0.037037   0.4  \n",
      "4  0.010525  0.000000  0.047068  0.518519   0.4  \n"
     ]
    }
   ],
   "source": [
    "# 2.5 归一化或正态化\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "scaler=MinMaxScaler()\n",
    "data_pre=pd.DataFrame(scaler.fit_transform(data_pre),columns=data_pre.columns)\n",
    "print(data_pre.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "f719a834-1f4d-4ce3-8847-799b91438def",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2.6 划分训练集和测试集\n",
    "from sklearn.model_selection import train_test_split\n",
    "x=data_pre.drop('初始评级',axis=1)\n",
    "y=data_pre['初始评级']\n",
    "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "5683190f-efd8-49a6-b87e-bffc09e18ee7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-20 {color: black;background-color: white;}#sk-container-id-20 pre{padding: 0;}#sk-container-id-20 div.sk-toggleable {background-color: white;}#sk-container-id-20 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-20 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-20 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-20 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-20 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-20 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-20 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-20 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-20 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-20 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-20 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-20 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-20 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-20 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-20 div.sk-item {position: relative;z-index: 1;}#sk-container-id-20 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-20 div.sk-item::before, #sk-container-id-20 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-20 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-20 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-20 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-20 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-20 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-20 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-20 div.sk-label-container {text-align: center;}#sk-container-id-20 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-20 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-20\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(max_depth=11)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" checked><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(max_depth=11)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "RandomForestClassifier(max_depth=11)"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# *****************4、模型选择及训练*****************\n",
    "# # max_depth参数设置为3，表示树的最大深度为3；\n",
    "# # criterion参数设置为\"entropy\"，表示使用信息熵作为划分标准。\n",
    "# from sklearn.tree import DecisionTreeClassifier\n",
    "# model=DecisionTreeClassifier(max_depth=3,criterion=\"entropy\")\n",
    "\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "model=RandomForestClassifier(n_estimators=100,criterion='gini', max_depth=11)\n",
    "\n",
    "# from sklearn.linear_model import LogisticRegression\n",
    "# model = LogisticRegression()\n",
    "\n",
    "model.fit(x_train,y_train.astype(int))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "67710184-7977-4caa-a0ab-9fce792d1ba4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9683333333333334\n"
     ]
    }
   ],
   "source": [
    "# *****************5、模型的预测及评估*****************\n",
    "result=model.score(x_test,y_test.astype(int))\n",
    "print(result)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
